library(tidyverse)

df_main <- read.csv("main_df.csv")

df <- df_main %>%
  filter(GENDER == "F" | GENDER == "M",
         MAJOR_DISCIPLINE != "Other",
         US_CITIZENSHIP_STATUS == "U.S. Citizen" | US_CITIZENSHIP_STATUS == "Perm Resident") %>%
  select(GENDER, MAJOR_DISCIPLINE) %>%
  mutate(Gender = GENDER,
         Major = MAJOR_DISCIPLINE)

pdf("major_counts_by_major.pdf", width = 9)
ggplot(df, aes(x = fct_infreq(Major), fill = Gender)) +
  geom_bar(stat = "count", width = 0.5) +
  labs(title = "Intended Major by Gender") +
  ylab("\nCount") +
  xlab("\nIntended Major") +
  scale_y_continuous(breaks = seq(from = 0, 
                                to = 15000,
                                by = 1000)) +
  theme_light()
dev.off()

